• Show All Code
  • Hide All Code

P8105 - Final Project

Links to datasets

  • US Election Twitter

  • 2020 Election Forecast

  • 2020 Election

Result Comparison

Results

vote_df = read_csv("./datasets/president_county_candidate.csv")
## Parsed with column specification:
## cols(
##   state = col_character(),
##   county = col_character(),
##   candidate = col_character(),
##   party = col_character(),
##   total_votes = col_double(),
##   won = col_logical()
## )
state_sum = read_csv("./datasets/president_state.csv")
## Parsed with column specification:
## cols(
##   state = col_character(),
##   total_votes = col_double()
## )
region_df = 
    read_csv("./datasets/states.csv") %>% 
    rename(state = State)  
## Parsed with column specification:
## cols(
##   State = col_character(),
##   `State Code` = col_character(),
##   Region = col_character(),
##   Division = col_character()
## )
election_winner_df =
   read_csv("./datasets/president_county_candidate.csv") %>% 
    group_by(state, party) %>% 
    mutate(party_total = sum(total_votes)) %>% 
    ungroup() %>% 
    group_by(state) %>%
    mutate(state_winner = case_when(
        party_total == max(party_total) ~ TRUE,
        party_total != max(party_total) ~ FALSE),
        state_total = sum(total_votes)
    )
## Parsed with column specification:
## cols(
##   state = col_character(),
##   county = col_character(),
##   candidate = col_character(),
##   party = col_character(),
##   total_votes = col_double(),
##   won = col_logical()
## )
winner_region =
    left_join(election_winner_df, region_df) %>% 
    filter(state_winner == TRUE) %>% 
    select(state, candidate, state_total, Region) %>% 
    distinct()
## Joining, by = "state"
election_map_df =
    election_winner_df %>% 
    filter(state_winner == TRUE) %>% 
    mutate(region = tolower(state)) %>% 
    select(state, candidate, party_total, state_total, region) %>% 
    distinct()

usa_map = map_data("state") 

usa_election_map = left_join(usa_map, election_map_df)
## Joining, by = "region"
colors <- c("red", "blue")
names(colors) = c("Donald Trump", "Joe Biden")

election_result_map =
ggplot(data = usa_election_map,
       aes(x = long, y = lat,
           group = group, fill = candidate, 
           text = paste("State: ", state , 
                 "</br></br>Candidate: ", candidate, 
                 "</br>Votes: ", party_total, 
                 "</br>Winning Proportion: ", round(party_total/state_total, 2)))) +
    geom_polygon(color = "gray90", size = 0.1) +
    labs(title = "Election Results across states") + 
    scale_fill_manual(values = colors) +
    theme_void() +
    theme(
        axis.title.x = element_blank(),
        axis.text.x = element_blank(),
        axis.ticks.x = element_blank(), 
        axis.title.y = element_blank(),
        axis.text.y = element_blank(),
        axis.ticks.y = element_blank(), 
        legend.position = "bottom") 

ggplotly(election_result_map, tooltip = "text")
## Warning: `group_by_()` is deprecated as of dplyr 0.7.0.
## Please use `group_by()` instead.
## See vignette('programming') for more help
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.

Election Results with Tweets

trump_df = 
  merge(
    read_csv("./datasets/trump1.csv"),
    read_csv("./datasets/trump2.csv"),
    all = TRUE
  ) %>%
  select(!X1) %>% 
  separate(created_at, into = c("creation_date", "creation_time"), sep = " ") %>% 
  separate(creation_date, into = c("creation_year", "creation_month", "creation_day"), sep = "-") %>% 
  separate(user_join_date, into = c("join_date", "join_time"), sep = " ") %>% 
  separate(join_date, into = c("join_year", "join_month", "join_day"), sep = "-") %>% 
  mutate(hashtag = "Trump")
## Warning: Missing column names filled in: 'X1' [1]
## Parsed with column specification:
## cols(
##   .default = col_character(),
##   X1 = col_double(),
##   created_at = col_datetime(format = ""),
##   tweet_id = col_double(),
##   likes = col_double(),
##   retweet_count = col_double(),
##   user_id = col_double(),
##   user_join_date = col_datetime(format = ""),
##   user_followers_count = col_double(),
##   lat = col_double(),
##   long = col_double(),
##   collected_at = col_datetime(format = "")
## )
## See spec(...) for full column specifications.
## Warning: Missing column names filled in: 'X1' [1]
## Parsed with column specification:
## cols(
##   .default = col_character(),
##   X1 = col_double(),
##   created_at = col_datetime(format = ""),
##   tweet_id = col_double(),
##   likes = col_double(),
##   retweet_count = col_double(),
##   user_id = col_double(),
##   user_join_date = col_datetime(format = ""),
##   user_followers_count = col_double(),
##   lat = col_double(),
##   long = col_double(),
##   collected_at = col_datetime(format = "")
## )
## See spec(...) for full column specifications.
biden_df = 
  merge(
    read_csv("./datasets/biden1.csv"),
    read_csv("./datasets/biden2.csv"),
    all = TRUE
  ) %>%
  select(!X1) %>%  
  separate(created_at, into = c("creation_date", "creation_time"), sep = " ") %>% 
  separate(creation_date, into = c("creation_year", "creation_month", "creation_day"), sep = "-") %>% 
  separate(user_join_date, into = c("join_date", "join_time"), sep = " ") %>% 
  separate(join_date, into = c("join_year", "join_month", "join_day"), sep = "-") %>% 
  mutate(hashtag = "Biden")
## Warning: Missing column names filled in: 'X1' [1]
## Parsed with column specification:
## cols(
##   .default = col_character(),
##   X1 = col_double(),
##   created_at = col_datetime(format = ""),
##   tweet_id = col_double(),
##   likes = col_double(),
##   retweet_count = col_double(),
##   user_id = col_double(),
##   user_join_date = col_datetime(format = ""),
##   user_followers_count = col_double(),
##   lat = col_double(),
##   long = col_double(),
##   collected_at = col_datetime(format = "")
## )
## See spec(...) for full column specifications.
## Warning: Missing column names filled in: 'X1' [1]
## Parsed with column specification:
## cols(
##   .default = col_character(),
##   X1 = col_double(),
##   created_at = col_datetime(format = ""),
##   tweet_id = col_double(),
##   likes = col_double(),
##   retweet_count = col_double(),
##   user_id = col_double(),
##   user_join_date = col_datetime(format = ""),
##   user_followers_count = col_double(),
##   lat = col_double(),
##   long = col_double(),
##   collected_at = col_datetime(format = "")
## )
## See spec(...) for full column specifications.
tweets_usa =
  merge(biden_df, trump_df, all = TRUE) %>% 
  filter(country == "United States of America")
usa_map <- map_data("state")

tweet_map <- tweets_usa %>%
group_by(state, hashtag) %>%
summarise(count = n(),
          likes = sum(likes)) %>%  
mutate (likes_tweets = likes*count,
        region = tolower(state)) %>%
select (region, hashtag, likes_tweets)  %>%
pivot_wider(names_from = "hashtag",
            values_from = "likes_tweets")  %>%
mutate(top = case_when(Biden>coalesce(Trump,0) ~ "Biden",
                       Trump>Biden ~ "Trump"))
## `summarise()` regrouping output by 'state' (override with `.groups` argument)
## Adding missing grouping variables: `state`
states_tweet_map <- left_join(usa_map, tweet_map)
## Joining, by = "region"
colors <- c("red", "blue")
names(colors) = c("Trump", "Biden")

tweet_result_map = 
  ggplot(data = states_tweet_map,
            aes(x = long, y = lat,
                group = group, fill = top, 
                text = paste("State: ", state , 
                 "</br></br>Candidate: ", top ))) +
    geom_polygon(color = "gray90", size = 0.1) +
    labs(title = "Tweets Results across states") + 
    scale_fill_manual(values = colors) +
    theme_void() +
    theme(
        axis.title.x = element_blank(),
        axis.text.x = element_blank(),
        axis.ticks.x = element_blank(), 
        axis.title.y = element_blank(),
        axis.text.y = element_blank(),
        axis.ticks.y = element_blank(), 
        legend.position = "bottom") 
    

ggplotly(tweet_result_map, tooltip = "text")